/**********************************************************************/
/*
   Author: Karan Makkar
   Created: Jan 2024
   Updated: Aug 2025, by Youssef Assarssah
   Description: Misc Calculations Referenced in the Paper
*/
/**********************************************************************/

/*----------------------------------------------------*/
* Section: Setup
/*----------------------------------------------------*/

* include filepaths 
if "$master_run" !="1" include "./Do/SET_FILEPATHS.do"

clear all
set more off
set matsize 11000

* Log
cap log close
global prefix: display %tdCYND td(`c(current_date)')
log using "$KP_logs/${prefix}_misc_calculations.txt", text replace

* Set filepaths for dataset
global aug20 "$KP_deid_sakernas/Clean/sak_aug20_deid_clean_merged.dta"
global feb21 "$KP_deid_sakernas/Clean/sak_feb21_deid_clean_merged.dta"
global aug21 "$KP_deid_sakernas/Clean/sak_aug21_deid_clean_merged.dta"
global sep20 "$KP_deid_susenas/Clean/sus_sep20_deid_clean_merged.dta"
global mar21 "$KP_deid_susenas/Clean/sus_mar21_deid_clean_merged.dta"
global sep21 "$KP_deid_susenas/Clean/sus_sep21_deid_clean_merged.dta"
global mar22 "$KP_deid_susenas/Clean/sus_mar22_deid_clean_merged.dta"
global mar18 "$KP_deid_susenas/Clean/sus_mar18_deid_clean_merged.dta"
global mar19 "$KP_deid_susenas/Clean/sus_mar19_deid_clean_merged.dta"
global mar20 "$KP_deid_susenas/Clean/sus_mar20_deid_clean_merged.dta"
global sep18 "$KP_deid_susenas/Clean/sus_sep18_deid_clean_merged.dta"
global feb19 "$KP_deid_sakernas/Clean/sak_feb19_deid_clean_merged.dta"
global feb20 "$KP_deid_sakernas/Clean/sak_feb20_deid_clean_merged.dta"
global aug18 "$KP_deid_sakernas/Clean/sak_aug18_deid_clean_merged.dta"
global aug19 "$KP_deid_sakernas/Clean/sak_aug19_deid_clean_merged.dta"

/*----------------------------------------------------*/
        /* Section: Prepare Batch Schedule Data */
/*----------------------------------------------------*/

import excel "$KP_deid_admin/Raw/00 Kartu Prakerja Batch Schedule.xlsx", sheet("Batch opening schedule") firstrow clear

replace batch = subinstr(batch, "*", "", .)
destring batch, replace

keep batch announcement

drop if mi(batch)

rename batch batch_treated_39
format announcement %tdMon_DD_CCYY

tempfile batch_dates
sa `batch_dates'

/*----------------------------------------------------*/
* Section: Share of Courses Online
/*----------------------------------------------------*/

* 2022
use "$KP_deid_survey/2022/Clean/072125_survey_21_22_deid_clean_control_merged.dta", clear 

keep if completed == 1 & verified == 1 & consented == 1 

bysort anon_id4: keep if _n ==1

forvalues i = 1/5{
	foreach x in "online" "online_live" "offline" {
		gen course`i'_`x'_kp = course`i'_`x'*course`i'_pay_kp
	}
}


egen courses_total_online = rowtotal(course*_online_kp)
egen courses_total_online_live = rowtotal(course*_online_live_kp)
egen courses_total_offline = rowtotal(course*_offline_kp)

*egen courses_total_sum = total(num_courses)
egen courses_total_online_sum = total(courses_total_online)
egen courses_total_online_live_sum = total(courses_total_online_live)
egen courses_total_offline_sum = total(courses_total_offline)
egen courses_total_sum = rowtotal(courses_total_online_sum courses_total_online_live_sum courses_total_offline_sum)

gen share_online = courses_total_online_sum/courses_total_sum
gen share_online_live = courses_total_online_live_sum/courses_total_sum
gen share_offline = courses_total_offline_sum/courses_total_sum

sum share_online
di "Online Asynchronous Share: `r(mean)'"

sum share_online_live
di "Online Synchronous Share: `r(mean)'"

sum share_offline
di "Offline Share: `r(mean)'"

summ courses_total*_sum 

/*----------------------------------------------------*/
* Section: Share of Winners who Complete Training,
           * Time to payment (median , 90thp)
/*----------------------------------------------------*/

u "$KP_deid_admin/Clean/pmo_b1-22_clean_long_deid.dta", clear

bysort anon_id4: gegen date_incentive = max(date_incentive), replace

keep if inrange(batch, 2, 22) & batch != 15

bysort anon_id4 (batch): keep if _n == _N

keep if win_in_batch ==1

gen complete_training = !mi(date_incentive)

tab complete_training

fmerge m:1 batch_treated_39 using `batch_dates', nogen keep(3)

gen diff = date_incentive-announcement

summ diff, d

/*----------------------------------------------------*/
* Section: Num Matches SUS-SAK to Admin 
/*----------------------------------------------------*/

* Sakernas
u "$KP_deid_sakernas/Clean/sak_aug20_deid_clean_merged.dta", clear
append using "$KP_deid_sakernas/Clean/sak_feb21_deid_clean_merged.dta"
append using "$KP_deid_sakernas/Clean/sak_aug21_deid_clean_merged.dta"

keep if inrange(batch, 1, 17)

bysort anon_id4: keep if _n ==1

count 
di "Total SAK match people: `r(N)'"

* Susenas
u "$KP_deid_susenas/Clean/sus_sep20_deid_clean_merged.dta", clear
append using "$KP_deid_susenas/Clean/sus_mar21_deid_clean_merged.dta"
append using "$KP_deid_susenas/Clean/sus_sep21_deid_clean_merged.dta"
append using "$KP_deid_susenas/Clean/sus_mar22_deid_clean_merged.dta"

keep if inrange(batch, 1, 18)

bysort anon_id4: keep if _n ==1

count 
di "Total SUS match people: `r(N)'"

/*----------------------------------------------------------------*/
* Section: Report Selected by Get Cash
/*----------------------------------------------------------------*/

* Load data and append
use "${aug20}", clear
append using "${feb21}"
append using "${aug21}"
append using "${sep20}"
append using "${mar21}" 
append using "${sep21}"
append using "${mar22}"


* Win before survey var
gen win_before_sak = .
replace win_before_sak  = ever_win_3 if sak_round == 5 & inrange(batch, 2, 3)
replace win_before_sak  = ever_win_11 if sak_round == 6 & inrange(batch, 2, 11)
replace win_before_sak  = ever_win_17 if sak_round == 7 & inrange(batch, 2, 17)
gegen win_before_sak = max(win_before_sak), by(sak_round anon_id4) replace

gen win_before_sus =.
replace win_before_sus  = ever_win_5 if sus_round == 5 & inrange(batch, 2, 5)
replace win_before_sus  = ever_win_11 if sus_round == 6 & inrange(batch, 2, 11)
replace win_before_sus  = ever_win_18 if sus_round == 7 & inrange(batch, 2, 18)
replace win_before_sus = ever_win_22 if sus_round == 8 & inrange(batch, 2, 22)
gegen win_before_sus = max(win_before_sus), by(sus_round anon_id4) replace

bysort anon_id4: gegen date_incentive = max(date_incentive), replace

* Ever Get Cash
gen mi_cashdate = mi(date_incentive)

* Report Selected SUS
replace report_selected = hh_pk_win if inlist(sus_round, 6, 7)
replace report_selected = get_pk if inlist(sus_round, 5, 8)

* Drop non-randomized batches
drop if inlist(batch, 1, 15)

bysort anon_id4 sus_round sak_round: keep if _n ==1 

ttest report_selected if win_before_sus==1, by(mi_cashdate)
ttest report_selected if win_before_sak==1, by(mi_cashdate)
ttest report_selected if win_before_sus==1 | win_before_sak ==1, by(mi_cashdate)
tab mi_cashdate if win_before_sus==1 | win_before_sak ==1

/*----------------------------------------------------------------*/
* Section: Education by Demog Match
/*----------------------------------------------------------------*/

* Load data and append
use "${aug20}", clear
append using "${feb21}"
append using "${aug21}"

* Merge in demographics match
fmerge m:1 anon_id4 sak_round using "$KP_deid_sakernas/Clean/age_gender_educ_match_ids.dta", assert(2 3) keep(3) nogen keepusing(match3_sus educmatch)

tempfile sak
sa `sak', replace

* Load data and append
use "${sep20}", clear
append using "${mar21}"
append using "${sep21}"
append using "${mar22}"

* Merge in demographics match
fmerge m:1 anon_id4 sus_round using "$KP_deid_susenas/Clean/age_gender_educ_match_ids.dta", assert(2 3) keep(3) nogen keepusing(match3_sus educmatch)

append using `sak'

* Keep 1 obs per person-wave
bysort anon_id4 sus_round sak_round: keep if _n ==1

* T-tests
ttest school_years if inrange(sus_round, 5, 8), by(educmatch)
ttest school_years if inrange(sak_round, 5, 7), by(educmatch)
ttest school_years, by(match3_sus)

* Plot distributions

* SUS
twoway (hist school_year if educmatch ==0 & inrange(sus_round, 5, 8), percent color(navy%30)) ///
(hist school_year if educmatch ==1 & inrange(sus_round, 5, 8), percent color(cranberry%30)), ///
legend(order(1 "Not Matched" 2 "Matched"))  ///
xtitle("Years of School")

* SAK
twoway (hist school_year if educmatch ==0 & inrange(sak_round, 5, 7), percent color(navy%30)) ///
(hist school_year if educmatch ==1 & inrange(sak_round, 5, 7), percent color(cranberry%30)), ///
legend(order(1 "Not Matched" 2 "Matched"))  ///
xtitle("Years of School")

/*----------------------------------------------------*/
* Section: Share Mar '20 SUS respondents with bank account
/*----------------------------------------------------*/
u "$KP_deid_susenas/Clean/sus_mar20_deid_clean.dta", clear

gen kp = !mi(anon_id4)

bysort renum: gegen savings = max(savings_acc), replace
bysort renum: gegen kp = max(kp), replace

bysort renum: keep if _n ==1

replace weight = round(weight)

tab savings_acc
tab savings_acc [fw = weight]

tab savings_acc if kp ==1
tab savings_acc if kp ==1 [fw = weight]

/*----------------------------------------------------*/
* Section: Payment Method (2021 Survey)
/*----------------------------------------------------*/

u "$KP_deid_survey/2021/Clean/071323_survey_21_deid_clean.dta", clear

tab kf04

/*----------------------------------------------------*/
* Section: Total applier/winner count, batches 1-22
/*----------------------------------------------------*/

u "$KP_deid_admin/Clean/pmo_b1-22_clean_long_deid.dta", clear

keep if inrange(batch, 1, 22)

bysort anon_id4: keep if _n ==1

count if mi(bobot)
assert r(N) ==0

count

tab ever_win_22

/*----------------------------------------------------*/
* Section: Formal/Informal Employment Share
/*----------------------------------------------------*/

u "$KP_deid_sakernas/Clean/sak_feb20_deid_clean.dta",  clear

gen formal = 0 if employed ==1

* Formal if part or full time work contract
replace formal = 1 if inrange(b5_r31, 1, 2)
gen formal2 = formal
replace formal = 1 if employment_status == 3



tab formal
tab formal2

/*----------------------------------------------------*/
* Section: Share Report Win
/*----------------------------------------------------*/

u "${aug21}", clear

bysort anon_id4: keep if _n ==1

keep if ever_win_17 ==1
tab report_selected


/*----------------------------------------------------*/
* Section: Share Matched by Report Won
/*----------------------------------------------------*/

*** Sakernas ***

* Load data and append
use "${aug20}", clear
append using "${feb21}"
append using "${aug21}"

* Merge in demographics match
fmerge m:1 anon_id4 sak_round using "$KP_deid_sakernas/Clean/age_gender_educ_match_ids.dta", assert(2 3) keep(3) nogen keepusing(match3_sus)

bysort anon_id4 sak_round: keep if _n ==1

summ match3_sus if report_selected ==0
summ match3_sus if report_selected ==1

summ match3_sus if batch ==16
*** Susenas ***

* Load data and append
use "${sep20}", clear
append using "${mar21}"
append using "${sep21}"
append using "${mar22}"

* Merge in demographics match
fmerge m:1 anon_id4 sus_round using "$KP_deid_susenas/Clean/age_gender_educ_match_ids.dta", assert(2 3) keep(3) nogen keepusing(match3_sus)

bysort anon_id4 sus_round: keep if _n ==1

cap drop report_selected
gen report_selected =.
replace report_selected = get_pk if inlist(sus_round, 5, 8)
replace report_selected = hh_pk_win if inlist(sus_round, 6, 7)

summ match3_sus if report_selected ==0
summ match3_sus if report_selected ==1

/*----------------------------------------------------*/
* Section: Job Search Mean for Employed
/*----------------------------------------------------*/

u "$KP_deid_survey/2022/Clean/072125_survey_21_22_deid_clean_control_merged.dta", clear
keep if completed == 1 & verified == 1 & consented == 1 

* Drop non-randomized batches
drop if inlist(batch, 1, 15)

summ job_search if batch >= 18 & employed ==1

/*----------------------------------------------------*/
* Section: Pre-Program Respondent Count
/*----------------------------------------------------*/

u "${mar18}", clear
append using "${mar19}", keep(anon_id4)
append using "${mar20}", keep(anon_id4)
append using "${sep18}", keep(anon_id4)
append using "${aug18}", keep(anon_id4)
append using "${aug19}", keep(anon_id4)
append using "${feb19}", keep(anon_id4)
append using "${feb20}", keep(anon_id4)

unique anon_id4 

/*----------------------------------------------------*/
* Section: Online Survey Response Rates
/*----------------------------------------------------*/

*2022
use "$KP_deid_survey/2022/Clean/072125_survey_21_22_deid_clean_control_merged.dta", clear 
gen respondent = completed == 1 & verified == 1 & consented == 1 
tab respondent if win_in_batch ==1
tab respondent if win_in_batch ==0
bysort anon_id4: keep if _n ==1
tab respondent

*2021
u "$KP_deid_survey/2021/Clean/071323_survey_21_deid_clean_merged.dta", clear 
bysort anon_id4: keep if _n ==1
keep if sent_survey == 1 | !mi(progress)
gen respondent = completed == 1 & verified == 1 & consented == 1 
tab respondent